import requests
import wordcloud
import re 
import pprint
from tkinter import *

video_url = "https://www.bilibili.com/video/BV1ep4y1s7xG?t=66"


headers={'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
          'referer':'https://www.bilibili.com',
          'cookie': "_uuid=6185ADAD-E351-4BC4-766E-0B38AF97020786236infoc; buvid3=4990DC29-6DB5-40B2-A10B-6EB9D9AF8F2B155832infoc; sid=d80bw00y; rpdid=|(umRkJu|m~)0J'ulmJl~JuY|; CURRENT_QUALITY=80; LIVE_BUVID=AUTO6915954949231230; blackside_state=1; CURRENT_FNVAL=80; fingerprint=98c99d00d54fe004b372b6bda52ab256; buvid_fp=4990DC29-6DB5-40B2-A10B-6EB9D9AF8F2B155832infoc; buvid_fp_plain=4990DC29-6DB5-40B2-A10B-6EB9D9AF8F2B155832infoc; DedeUserID=431943900; DedeUserID__ckMd5=cd3a9705c2aaafad; SESSDATA=e0703c30%2C1629442221%2Cf30c4*21; bili_jct=64f53090c52cf878f430e6cb30d8c2fb; bsource=search_360; finger=1571944565; bp_video_offset_431943900=494159708514409608; PVID=3"}




def openVideoUrl(url,headers):
    '''
    打开视频网页，获取网页原代码并返回
    '''
    html = requests.get(url=url,headers=headers).text
    return html

def findCid(html):
    '''
    从网页源代码中找到cid并返回
    '''
    p = re.compile('cid=[0-9]*')
    cid = p.findall(html)[0].split('=')[1]
    return cid

def openBarrageUrl(cid,headers):
    '''
     打开弹幕网址,提取弹幕

     cid：cid 号
     
     headers: 请求头
    '''
    barrage_url = "https://comment.bilibili.com/{}.xml".format(cid)
    html = requests.get(url=barrage_url,headers=headers)
    xml = html.content.decode('utf-8')
    
    with open(r"弹幕.xml",'w',encoding='utf-8') as f:
        f.write(xml)
    p = re.compile('[\u4e00-\u9fa5]+')
    #pprint.pprint(xml)
    #print(p.findall(xml))
    return str(p.findall(xml))
    

def genWordCloud(dm_text):
    '''
    生成词云图

    dm_text：字符串
    '''
    stop_words = {"'"}
    wc = wordcloud.WordCloud(font_path=r"C:\Windows\Fonts\simhei.ttf",width=800,height=600,stopwords=stop_words)
    wc.generate(dm_text)
    image = wc.to_image()
    image.show()

def showtime():
    AV_url = input_lable.get()
    html = openVideoUrl(url=AV_url,headers=headers)
    cid = findCid(html=html)
    dm_data = openBarrageUrl(cid=cid,headers=headers)
    genWordCloud(dm_data)
    
if __name__ == "__main__":
    window = Tk()
    window.title("bilibili视频弹幕词云图生成器")
    window.resizable(0,0)
    input_lable = Entry(window,width = '25',fg = 'white',bg = 'skyblue')
    input_lable.pack()
    input_lable.insert(0,"输入视频网址")
    button = Button(window,text = "生成词云图片",foreground='blue',command = showtime)
    button.pack()
    window.mainloop()